Open necessary datasets
cwurData <- read.csv("cwurData.csv")
educationExpenditure <- read.csv("education_expenditure_supplementary_data.csv")
educationalAttainment <- read.csv("educational_attainment_supplementary_data.csv")
schoolCountry <- read.csv("school_and_country_table.csv")
shanghaiData <- read.csv("shanghaiData.csv")
timesData <- read.csv("timesData.csv")
knitr::kable(head(cwurData,10), caption = "Central World University Rankings information (first 10 rows)")
Central World University Rankings information (first 10
rows)
| 1 |
Harvard University |
USA |
1 |
7 |
9 |
1 |
1 |
1 |
1 |
NA |
5 |
100.00 |
2012 |
| 2 |
Massachusetts Institute of Technology |
USA |
2 |
9 |
17 |
3 |
12 |
4 |
4 |
NA |
1 |
91.67 |
2012 |
| 3 |
Stanford University |
USA |
3 |
17 |
11 |
5 |
4 |
2 |
2 |
NA |
15 |
89.50 |
2012 |
| 4 |
University of Cambridge |
United Kingdom |
1 |
10 |
24 |
4 |
16 |
16 |
11 |
NA |
50 |
86.17 |
2012 |
| 5 |
California Institute of Technology |
USA |
4 |
2 |
29 |
7 |
37 |
22 |
22 |
NA |
18 |
85.21 |
2012 |
| 6 |
Princeton University |
USA |
5 |
8 |
14 |
2 |
53 |
33 |
26 |
NA |
101 |
82.50 |
2012 |
| 7 |
University of Oxford |
United Kingdom |
2 |
13 |
28 |
9 |
15 |
13 |
19 |
NA |
26 |
82.34 |
2012 |
| 8 |
Yale University |
USA |
6 |
14 |
31 |
12 |
14 |
6 |
15 |
NA |
66 |
79.14 |
2012 |
| 9 |
Columbia University |
USA |
7 |
23 |
21 |
10 |
13 |
12 |
14 |
NA |
5 |
78.86 |
2012 |
| 10 |
University of California, Berkeley |
USA |
8 |
16 |
52 |
6 |
6 |
5 |
3 |
NA |
16 |
78.55 |
2012 |
knitr::kable(head(shanghaiData,10),caption="Shanghai Ranking information (first 10 rows)")
Shanghai Ranking information (first 10 rows)
| 1 |
Harvard University |
1 |
100.0 |
100.0 |
100.0 |
100.0 |
100.0 |
100.0 |
72.4 |
2005 |
| 2 |
University of Cambridge |
1 |
73.6 |
99.8 |
93.4 |
53.3 |
56.6 |
70.9 |
66.9 |
2005 |
| 3 |
Stanford University |
2 |
73.4 |
41.1 |
72.2 |
88.5 |
70.9 |
72.3 |
65.0 |
2005 |
| 4 |
University of California, Berkeley |
3 |
72.8 |
71.8 |
76.0 |
69.4 |
73.9 |
72.2 |
52.7 |
2005 |
| 5 |
Massachusetts Institute of Technology (MIT) |
4 |
70.1 |
74.0 |
80.6 |
66.7 |
65.8 |
64.3 |
53.0 |
2005 |
| 6 |
California Institute of Technology |
5 |
67.1 |
59.2 |
68.6 |
59.8 |
65.8 |
52.5 |
100.0 |
2005 |
| 7 |
Columbia University |
6 |
62.3 |
79.4 |
60.6 |
56.1 |
54.2 |
69.5 |
45.4 |
2005 |
| 8 |
Princeton University |
7 |
60.9 |
63.4 |
76.8 |
60.9 |
48.7 |
48.5 |
59.1 |
2005 |
| 9 |
University of Chicago |
8 |
60.1 |
75.6 |
81.9 |
50.3 |
44.7 |
56.4 |
42.2 |
2005 |
| 10 |
University of Oxford |
2 |
59.7 |
64.3 |
59.1 |
48.4 |
55.6 |
68.4 |
53.2 |
2005 |
knitr::kable(head(educationalAttainment,10),caption="Education attainment information (first 10 rows)")
Education attainment information (first 10 rows)
| Afghanistan |
Barro-Lee: Average years of primary schooling, age 15+,
female |
0.33 |
NA |
NA |
0.44 |
NA |
NA |
NA |
0.57 |
NA |
NA |
NA |
NA |
0.75 |
NA |
NA |
NA |
NA |
0.86 |
NA |
NA |
NA |
NA |
1.27 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age 15+,
total |
1.03 |
NA |
NA |
1.26 |
NA |
NA |
NA |
1.54 |
NA |
NA |
NA |
NA |
2.01 |
NA |
NA |
NA |
NA |
2.18 |
NA |
NA |
NA |
NA |
2.64 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age
15-19, female |
0.83 |
NA |
NA |
0.95 |
NA |
NA |
NA |
1.26 |
NA |
NA |
NA |
NA |
1.92 |
NA |
NA |
NA |
NA |
1.01 |
NA |
NA |
NA |
NA |
2.45 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age
15-19, total |
2.34 |
NA |
NA |
2.22 |
NA |
NA |
NA |
2.37 |
NA |
NA |
NA |
NA |
3.83 |
NA |
NA |
NA |
NA |
2.26 |
NA |
NA |
NA |
NA |
3.55 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age
20-24, female |
0.54 |
NA |
NA |
0.92 |
NA |
NA |
NA |
0.94 |
NA |
NA |
NA |
NA |
1.26 |
NA |
NA |
NA |
NA |
2.00 |
NA |
NA |
NA |
NA |
1.29 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age
20-24, total |
1.52 |
NA |
NA |
2.51 |
NA |
NA |
NA |
2.27 |
NA |
NA |
NA |
NA |
2.48 |
NA |
NA |
NA |
NA |
3.93 |
NA |
NA |
NA |
NA |
2.64 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age 25+,
female |
0.17 |
NA |
NA |
0.25 |
NA |
NA |
NA |
0.37 |
NA |
NA |
NA |
NA |
0.48 |
NA |
NA |
NA |
NA |
0.63 |
NA |
NA |
NA |
NA |
0.81 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age 25+,
total |
0.66 |
NA |
NA |
0.85 |
NA |
NA |
NA |
1.14 |
NA |
NA |
NA |
NA |
1.38 |
NA |
NA |
NA |
NA |
1.69 |
NA |
NA |
NA |
NA |
2.19 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age
25-29, female |
0.44 |
NA |
NA |
0.54 |
NA |
NA |
NA |
0.92 |
NA |
NA |
NA |
NA |
0.94 |
NA |
NA |
NA |
NA |
1.26 |
NA |
NA |
NA |
NA |
1.92 |
NA |
NA |
NA |
NA |
| Afghanistan |
Barro-Lee: Average years of primary schooling, age
25-29, total |
1.28 |
NA |
NA |
1.52 |
NA |
NA |
NA |
2.51 |
NA |
NA |
NA |
NA |
2.27 |
NA |
NA |
NA |
NA |
2.48 |
NA |
NA |
NA |
NA |
3.93 |
NA |
NA |
NA |
NA |
knitr::kable(head(educationExpenditure,10),caption="Education expenditure information (first 10 rows)")
Education expenditure information (first 10 rows)
| OECD Average |
All Institutions |
Public |
4.9 |
4.9 |
5.0 |
5.4 |
5.4 |
5.3 |
| Australia |
All Institutions |
Public |
4.5 |
4.6 |
4.3 |
4.5 |
4.6 |
4.3 |
| Austria |
All Institutions |
Public |
5.3 |
5.4 |
5.2 |
5.7 |
5.6 |
5.5 |
| Belgium |
All Institutions |
Public |
5.0 |
5.1 |
5.8 |
6.4 |
6.4 |
6.4 |
| Canada |
All Institutions |
Public |
5.8 |
5.2 |
4.8 |
5.0 |
5.2 |
NA |
| Chile |
All Institutions |
Public |
NA |
4.2 |
3.3 |
4.1 |
4.3 |
3.9 |
| Czech Republic |
All Institutions |
Public |
4.8 |
4.2 |
4.1 |
4.2 |
4.1 |
4.4 |
| Denmark |
All Institutions |
Public |
6.5 |
6.4 |
6.8 |
7.5 |
7.6 |
7.5 |
| Estonia |
All Institutions |
Public |
NA |
NA |
4.7 |
5.9 |
5.6 |
5.2 |
| Finland |
All Institutions |
Public |
6.6 |
5.5 |
5.9 |
6.3 |
6.4 |
6.3 |
knitr::kable(head(timesData,10),caption="Times Higher Education World University Rankings data information (first 10 rows)")
Times Higher Education World University Rankings data
information (first 10 rows)
| 1 |
Harvard University |
United States of America |
99.7 |
72.4 |
98.7 |
98.8 |
34.5 |
96.1 |
20,152 |
8.9 |
25% |
|
2011 |
| 2 |
California Institute of Technology |
United States of America |
97.7 |
54.6 |
98.0 |
99.9 |
83.7 |
96.0 |
2,243 |
6.9 |
27% |
33 : 67 |
2011 |
| 3 |
Massachusetts Institute of Technology |
United States of America |
97.8 |
82.3 |
91.4 |
99.9 |
87.5 |
95.6 |
11,074 |
9.0 |
33% |
37 : 63 |
2011 |
| 4 |
Stanford University |
United States of America |
98.3 |
29.5 |
98.1 |
99.2 |
64.3 |
94.3 |
15,596 |
7.8 |
22% |
42 : 58 |
2011 |
| 5 |
Princeton University |
United States of America |
90.9 |
70.3 |
95.4 |
99.9 |
- |
94.2 |
7,929 |
8.4 |
27% |
45 : 55 |
2011 |
| 6 |
University of Cambridge |
United Kingdom |
90.5 |
77.7 |
94.1 |
94.0 |
57.0 |
91.2 |
18,812 |
11.8 |
34% |
46 : 54 |
2011 |
| 6 |
University of Oxford |
United Kingdom |
88.2 |
77.2 |
93.9 |
95.1 |
73.5 |
91.2 |
19,919 |
11.6 |
34% |
46 : 54 |
2011 |
| 8 |
University of California, Berkeley |
United States of America |
84.2 |
39.6 |
99.3 |
97.8 |
- |
91.1 |
36,186 |
16.4 |
15% |
50 : 50 |
2011 |
| 9 |
Imperial College London |
United Kingdom |
89.2 |
90.0 |
94.5 |
88.3 |
92.9 |
90.6 |
15,060 |
11.7 |
51% |
37 : 63 |
2011 |
| 10 |
Yale University |
United States of America |
92.1 |
59.2 |
89.7 |
91.5 |
- |
89.5 |
11,751 |
4.4 |
20% |
50 : 50 |
2011 |
knitr::kable(head(schoolCountry,10),caption="School & country information (first 10 rows)")
School & country information (first 10 rows)
| Harvard University |
United States of America |
| California Institute of Technology |
United States of America |
| Massachusetts Institute of Technology |
United States of America |
| Stanford University |
United States of America |
| Princeton University |
United States of America |
| University of Cambridge |
United Kingdom |
| University of Oxford |
United Kingdom |
| University of California, Berkeley |
United States of America |
| Imperial College London |
United Kingdom |
| Yale University |
United States of America |
glimpse(cwurData)
## Rows: 2,200
## Columns: 14
## $ world_rank <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15~
## $ institution <chr> "Harvard University", "Massachusetts Institute of~
## $ country <chr> "USA", "USA", "USA", "United Kingdom", "USA", "US~
## $ national_rank <int> 1, 2, 3, 1, 4, 5, 2, 6, 7, 8, 9, 10, 11, 1, 12, 1~
## $ quality_of_education <int> 7, 9, 17, 10, 2, 8, 13, 14, 23, 16, 15, 21, 31, 3~
## $ alumni_employment <int> 9, 17, 11, 24, 29, 14, 28, 31, 21, 52, 26, 42, 16~
## $ quality_of_faculty <int> 1, 3, 5, 4, 7, 2, 9, 12, 10, 6, 8, 14, 24, 31, 20~
## $ publications <int> 1, 12, 4, 16, 37, 53, 15, 14, 13, 6, 34, 22, 9, 8~
## $ influence <int> 1, 4, 2, 16, 22, 33, 13, 6, 12, 5, 20, 21, 10, 19~
## $ citations <int> 1, 4, 2, 11, 22, 26, 19, 15, 14, 3, 28, 16, 8, 23~
## $ broad_impact <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N~
## $ patents <int> 5, 1, 15, 50, 18, 101, 26, 66, 5, 16, 101, 10, 9,~
## $ score <dbl> 100.00, 91.67, 89.50, 86.17, 85.21, 82.50, 82.34,~
## $ year <int> 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2012, 2~
Review and reveal interesting facts
cwurData %>% group_by(year) %>%
select(year,institution,world_rank) %>% top_n(-5, wt = world_rank) -> cwurTop5
plot_ly(cwurTop5, x = ~year) %>%
add_trace(y = cwurTop5$world_rank, name = cwurTop5$institution, showlegend=TRUE, type = 'scatter', mode = 'lines+markers', color= cwurTop5$institution) %>%
layout(title="World Ranked Universities by CWUR (2012-2015)",
xaxis = list(showticklabels = TRUE, tickangle = 0, tickfont = list(size = 8)),
yaxis = list(title = "World rank"),
hovermode = 'compare')
cwurPlotYear <- function(nYear) {
cwurData %>% filter(year==nYear) %>% top_n(10,-world_rank) %>%
ggplot(aes(x=reorder(institution,-world_rank), y=world_rank)) + geom_bar(stat="identity", aes(fill=reorder(institution,-world_rank)), colour="black") +
theme_bw() + coord_flip() + scale_fill_manual(values=c(rep("lightgreen",7), "#CD7F32", "grey", "gold")) + guides(fill=FALSE) +
labs(x="Institution", y="World Rank",
title=paste("Rank in ",nYear), subtitle="(smaller value is better)")
}
cwurPlotYear(2012) -> d1
cwurPlotYear(2013) -> d2
cwurPlotYear(2014) -> d3
cwurPlotYear(2015) -> d4
grid.arrange(d1,d2,d3,d4, ncol=2)

cwurData %>% group_by(country) %>% summarise(n = length(publications)) %>% top_n(10,n) %>% ungroup() -> c
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=publications, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by publication",
title="Rank by publication", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d1
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=citations, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by citations",
title="Rank by citations", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d2
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=patents, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by patents",
title="Rank by patents", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d3
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_education, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by quality of education",
title="Rank by quality of education", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d4
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=alumni_employment, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by alumni employment",
title="Rank by alumni employment", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d5
cwurData %>% filter(country %in% c$country) %>%
ggplot(aes(x=country, y=quality_of_faculty, col=country)) + guides(col=FALSE) +
geom_boxplot() + theme_bw() + coord_flip() +
labs(x="Country", y="Rank by quality of faculty",
title="Rank by quality of faculty", subtitle="Grouped by country, smaller value is better") + theme(text = element_text(size = 20)) -> d6
grid.arrange(d1,d2,d3,d4,d5,d6, ncol=2)

cwurData %>% group_by(country,year) %>%
summarise(nr = length(world_rank), minw=min(world_rank), maxw=max(world_rank), avgw=round(mean(world_rank),0)) %>%
select(country, year, nr, minw, maxw, avgw) %>% ungroup() -> ccwur
# light grey boundaries
l <- list(color = toRGB("grey"), width = 0.5)
ccwur$hover <- with(ccwur,
paste("Country: ", country, '<br>',
"Year: ",year, "<br>",
"Universities in top: ", nr, "<br>",
"Min rank in top: ", minw, "<br>",
"Max rank in top: ", maxw, "<br>",
"Mean rank in top: ", avgw,"<br>"
))
g <- list(
showframe = TRUE,
showcoastlines = TRUE,
projection = list(type = 'orthogonal')
)
plot_geo(ccwur, locationmode = 'country names') %>%
add_trace(
z = ~nr, color = ~nr, colors = 'Spectral', frame = ~year,
text = ~hover, locations=~country, marker = list(line = l)
) %>%
colorbar(title = 'Number of\nuniversities in top', tickprefix = '') %>%
layout(
title = with(ccwur, paste('Number of universities in top<br>Source:<a href="http://cwur.org/">Council of World University Ranking</a>')),
geo = g
)